
rm(list=ls(all=T))
theslsubmaster<-read.csv("C:\\Users\\Stelios Fourakis\\Documents\\THESL\\FINAL\\theslcompsubfinal0115.csv",colClasses="character",header=T)
mapfilein<-read.csv("C:\\Users\\Stelios Fourakis\\Documents\\THESL\\FINAL\\transmap3.csv",header=T,colClasses="character")

subextract<-function(allsubdata,mapfile){

allsubdata$report_year<-as.numeric(allsubdata$report_year)
allsubdata$respondent_id<-as.numeric(allsubdata$respondent_id)
allsubdata$prmry_voltage<-as.numeric(allsubdata$prmry_voltage)
allsubdata$scndry_voltage<-as.numeric(allsubdata$scndry_voltage)
allsubdata$substn_capacity<-as.numeric(allsubdata$substn_capacity)


peglocs<-match(allsubdata$respondent_id,mapfile$FERCCODE)

allsubdata$pegid<-mapfile$PEGID[peglocs]
allsubdata$realid<-paste(allsubdata$pegid,allsubdata$report_year,sep="_")


unitlocs<-which(allsubdata$prmry_voltage<1&allsubdata$prmry_voltage>0)
unitcompanies<-unique(allsubdata$realid[unitlocs])
unitcomplocs<-which(allsubdata$realid%in%unitcompanies)

allsubdata$prmry_voltage[unitcomplocs]<-1000*allsubdata$prmry_voltage[unitcomplocs]
allsubdata$scndry_voltage[unitcomplocs]<-1000*allsubdata$scndry_voltage[unitcomplocs]

distdataall<-allsubdata
distdata50<-allsubdata[which(allsubdata$prmry_voltage>=50),]

distall<-with(distdataall,aggregate(substn_capacity,by=list(pegid,report_year),FUN=sum))

names(distall)<-c("pegid","year","mvadx")

dist50<-with(distdata50,aggregate(substn_capacity,by=list(pegid,report_year),FUN=sum))

names(dist50)<-c("pegid","year","mvagt50")

realidall<-paste(distall$pegid,distall$year,sep="_")
realid50<-paste(dist50$pegid,dist50$year,sep="_")

complist<-unique(allsubdata$realid)
outdata<-data.frame(pegid=complist,year=complist,company="",mvagt50=0,mvadx=0,stringsAsFactors=F)


outdata$pegid<-substr(outdata$pegid,1,nchar(outdata$pegid)-5)
outdata$year<-substr(outdata$year,nchar(outdata$year)-3,nchar(outdata$year))

namelocs<-match(outdata$pegid,mapfile$PEGID)
outdata$company<-mapfile$COMPANY[namelocs]

locsall<-match(complist,realidall)
locs50<-match(complist,realid50)

locsnaall<-which(is.na(locsall)==T)
locsna50<-which(is.na(locs50)==T)
if(length(locsnaall)>0){
outdata$mvadx[-locsnaall]<-distall$mvadx[locsall[-locsnaall]]
}
else{
outdata$mvadx<-distall$mvadx[locsall]
}
if(length(locsna50)>0){
outdata$mvagt50[-locsna50]<-dist50$mvagt50[locs50[-locsna50]]
}
else{
outdata$mvagt50<-distall$mva50[locs50]
}

dxkvalocs<-which(outdata$mvadx>100000)
gt50kvalocs<-which(outdata$mvagt50>100000)

outdata$mvagt50[gt50kvalocs]<-outdata$mvagt50[gt50kvalocs]/1000
outdata$mvadx[dxkvalocs]<-outdata$mvadx[dxkvalocs]/1000

outdata<-outdata[order(outdata$pegid,outdata$year),]
return(outdata)
}

theslsubs<-subextract(theslsubmaster,mapfilein)
write.csv(theslsubs,"C:\\Users\\Stelios Fourakis\\Documents\\THESL\\FINAL\\theslsubdatafinal0115.csv",row.names=F)
